## # A tibble: 6 x 3
## pref pop epc
## <chr> <dbl> <dbl>
## 1 Hokkaido 5384 11070
## 2 Aomori 1309 2720
## 3 Iwate 1280 2769
## 4 Miyagi 2334 4819
## 5 Akita 1023 2177
## 6 Yamagata 1123 2385
## pref pop epc
## Length:47 Min. : 574 Min. : 1360
## Class :character 1st Qu.: 1114 1st Qu.: 2543
## Mode :character Median : 1649 Median : 3556
## Mean : 2704 Mean : 5678
## 3rd Qu.: 2728 3rd Qu.: 5838
## Max. :13514 Max. :28097
plot(kadai01[,2:3],
xlab = "Tthe Number of Population (thousands)",
ylab = "Electricity Consumption (million kWh")
abline(kadai.lm)## pref pop epc resid resid_abs
## 14 Kanagawa 9127 16974 -1318.1465 1318.1465
## 13 Tokyo 13514 28097 1188.2946 1188.2946
## 11 Saitama 7261 13751 -876.1137 876.1137
## 12 Chiba 6224 11776 -814.3297 814.3297
## 34 Hiroshima 2845 6592 638.4044 638.4044
## 17 Ishikawa 1154 3159 526.7176 526.7176
## 16 Toyama 1067 2918 456.5953 456.5953
## 40 Fukuoka 5103 10825 436.4397 436.4397
## 33 Okayama 1922 4572 431.2796 431.2796
## 28 Hyogo 5537 11626 385.0152 385.0152
環境省が公表する「環境統計集(平成29年度版)」 のデータを用いて、グラフを1つ作成し、考察しなさい。枠内には、①使用データとそのURL、②プログラムソース、③グラフ、④考察の4点を記載しなさい。なお、データ整形は、必ずしもRを使用しなくても良い。必要があれば、データ整形に関する説明も、①使用データの部分に記載しなさい。
コード等をwordに貼って、ファイルを提出する
kadai.lm##
## Call:
## lm(formula = epc ~ pop, data = kadai01)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1318.15 -129.32 -35.57 174.31 1188.29
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 365.69731 79.91473 4.576 3.72e-05 ***
## pop 1.96411 0.02091 93.913 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 387 on 45 degrees of freedom
## Multiple R-squared: 0.9949, Adjusted R-squared: 0.9948
## F-statistic: 8820 on 1 and 45 DF, p-value: < 2.2e-16
残渣(residuals)は、実際の値と予測値の差分で計算される。
以下の図だと、予測値の青の線と実際の値である赤の線の間が残差である。
# load packages
library(modelr)
library(scales)
library(plotly)
# define the data for visualization
kadai01_plot <- kadai01 %>%
add_predictions(kadai.lm) %>%
add_residuals(kadai.lm) %>%
`names<-`(value = c("pref", "pop", "epc", "predicted_epc", "residuals")) %>%
pivot_longer(cols = c(-pref, -pop), names_to = "vars", values_to = "value")
# raw v.s. expected value
g1 <-
kadai01_plot %>%
filter(vars != "residuals") %>%
ggplot(aes(pop, value, color = vars, label = pref)) +
geom_point() +
geom_line() +
scale_x_continuous(labels = comma) +
scale_y_continuous(labels = comma) +
labs(title = "The scatter plot of the predicted value and raw data based on `kadai01`",
x = "# of population",
y = "Electricity",
color = "Name") +
theme_minimal()
ggplotly(g1)実際の値である人口の変化によって生じる残差の値を、グラフ化したのが以下の図になる。
# residual
library(ggrepel)
kadai01_plot %>%
filter(vars == "residuals") %>%
ggplot(aes(pop, value, label = pref)) +
geom_hline(yintercept = 0, colour = "black", linetype = "dashed") +
geom_point() +
geom_line() +
geom_label_repel(nudge_x = TRUE, nudge_y = TRUE, check_overlap = TRUE) +
labs(title = "The scatter plot of the residual",
x = "Pop",
y = "Residuals") +
theme_minimal()pacman::p_load("NipponMap", "sf", "ggthemes")
map <- read_sf(system.file("shapes/jpn.shp", package = "NipponMap")[1],
crs = "+proj=longlat +datum=WGS84")
map2 <-
kadai01_plot %>%
pivot_wider(id_cols = c(pref, pop), names_from = vars, values_from = value) %>%
rename(name = pref) %>%
left_join(map, by = "name") %>%
st_sf()
g3 <-
ggplot(map2, aes(fill = residuals, label = name)) +
geom_sf(size = .1) +
labs(title = "The Residuals Accross Japanese Prefectures") +
scale_fill_gradient2(
name = "Residuals",
labels = scales::number_format(big.mark = ","),
low = "#15C6E3",
mid = "white",
high = "#EB7411",
midpoint = 0) +
theme_pander()
g3